drive <- "E:"
code.dir <- paste(drive, "STAT5703/Code", sep="/")
data.dir <- paste(drive, "STAT5703/Data", sep="/")
   #
source(paste(code.dir, "3drotations.r", sep = "/"))
source(paste(code.dir, "creategrid.r", sep = "/"))
source(paste(code.dir, "confusion_ex.r", sep = "/"))
source(paste(code.dir, "creategaussians.r", sep="/"))
source(paste(code.dir, "example_display.r", sep = "/"))
   #
library(nnet)
library(mda)

   #
   # This is a simple neural net example that
   # classifies a logical AND. That is
           #  x   y   
           #  0 & 0 = 0
           #  0 & 1 = 0
           #  1 & 0 = 0
           #  1 & 1 = 1
   # This can be done with a simple TLU.


#===================================================
# Compute delta.w
#===================================================
comp.delta.w <- function (input, w, class, rate){
   activation <- t(input)%*%w;       # [x1,x2,-1].[w1,w2,thresh]
      # if activation > threshold  y <- 1 (fire)
   y <- (activation > 0)*1
   factor <- rate*(class - y);
   cat(formatC(c(w, input, activation, y, class, factor, factor*input,
                 1-(class==y)), format="f",
               digit=4, width=7), "\n")
   return(list(delta=factor*input, Err=1-(class==y)))
}

#===================================================
# Plot and update the weights
#===================================================
f.update.weights <- function (inputs, w, target, rate, XR, YR){
  Total.Error <- 0	
  for (i in 1:(length(inputs[,1]))){  
      plot(XR, YR, type = "n", 
           main=paste(floor(w[1]*100+.5)/100, floor(w[2]*100+.5)/100,
             floor(w[3]*100+.5)/100),
           xlab="", ylab="")
      mtext(paste("b=",floor(w[3]/w[2]*100+.5)/100,
                  " m=",floor(w[1]/w[2]*100)/100),side=1, line=0.5, cex=.8)
      points (inputs[,1], inputs[,2], col=target+2, pch=19, cex=1.2)
      plot.line (w)
      points(inputs[i,1], inputs[i,2], col="blue", pch="x", cex=2)
      delta.w <- comp.delta.w(inputs[i,], w, target[i], rate)
      w <- w + delta.w$delta
      Total.Error <- Total.Error + delta.w$Err
  }
  return (list(w=w, Err=Total.Error))
}

#===================================================
# Plot a coloured line 
#===================================================
plot.line <- function (w){
  if (w[2] != 0){
    inter <- w[3]/w[2]
    slope <- -w[1]/w[2]
    abline (inter,slope, col="red")
  }
  else {  # vertical line at xx
    xx <- w[3]/w[1]
    abline (v=xx, col="red")
  }
}

#===================================================
#  Main function 
#===================================================
f.AND <- function(inputs, class, w, rate, XR, YR) {
      # Now use up to 200 training cycles to get the weights
   T.Err <- 0
   for (i in (1:200)) {
        # Display the inputs (coloured by class)
     cat("  w1      w2      b0      x1      x2    -1       act.     y",
            "     target  delta    dw1     dw2     db     \n")
     old.w <- w
     res <- f.update.weights (inputs, w, class, rate, XR, YR)
     w <- res$w
     cat("weights = ",w," old ",old.w, "Err = ", res$Err, "\n")
     if (res$Err == 0) {
        break
     }		 
     ret <- readline("Press Enter...")
     if (ret == "q") break
  }
}

   # Data for problem
rate <- 0.25;     # Learning rate
x1 <- c(0,0,-1)
x2 <- c(0,1,-1)
x3 <- c(1,0,-1)
x4 <- c(1,1,-1)

graphics.off()
oldpar <- par(mfrow = c(5,4),mar=c(2,1,2,1),xaxt="n",yaxt="n")
f.AND(rbind(x1,x2,x3,x4), c(0,0,0,1), c(-.2, 0.2, -0.5), rate,
      -.5:1.5, -.5:1.5)
      
par(oldpar)


   # Gradient  Form 
comp.delta.w <- function (input.i, w, target, rate){
   activation <- t(input.i)%*%w
         # As before [x1,x2,x3].[w1,w2,w3] = [x1,x2,-1].[w1,w2,b]
   factor <- rate*(target - activation)
   cat(formatC(c(w, input.i, activation, target,  factor, 
                 factor*input.i, 0.5*(target-activation)^2), 
               format="f", digit=4,width=7), "\n")
   return(list(delta=factor*input.i, Err=0.5*(target-activation)^2))
}

#===================================================
#  Main function 
#===================================================
f2.AND.G <- function(inputs, class, w, rate, XR, YR) {
      # Now use up to 200 training cycles to get the weights
   T.Err <- 9999999
   for (i in (1:200)) {
        # Display the inputs (coloured by class)
     cat("  w1      w2      b       x1      x2    -1        act.",
            "  target  delta    dw1     dw2     db     \n")
     old.w <- w
     res <- f.update.weights (inputs, w, class, rate, XR, YR)
     w <- res$w
     cat("weights = ",w," old ",old.w,"\n")
     cat("Err = ", res$Err, "\n")
     if (abs(res$Err - T.Err) > 0.01*res$Err) {
        T.Err <- res$Err
        ret <- readline("Press Enter...")
        if (ret == "q") break
      } else {
        break
     }		 
  }
}

x1 <- c(0,0,-1)
x2 <- c(0,1,-1)
x3 <- c(1,0,-1)
x4 <- c(1,1,-1)

graphics.off()
oldpar <- par(mfrow = c(5,4), mar=c(2,1,2,1), xaxt="n", yaxt="n")
f2.AND.G(rbind(x1,x2,x3,x4), c(-1,-1,-1,1), c(-.2, 0.2, -0.5), rate,
       -.5:1.5,-.5:1.5)
       
par(oldpar)


   # NNET FLEA
source(paste(code.dir, "ReadFleas.r", sep="/"))
t(class.ind(flea.species))
   #
apply(class.ind(flea.species),2,sum)

   #
#============================================ 
# A function to produce those indices NOT in a set.
# Use this to get the test sample.
#============================================ 
"%w/o%" <- function(x,y) x[!x %in% y]

#============================================ 
# Set the indices for the training/test sets
#============================================ 
get.train <- function (data.sz, train.sz) {
      # Take subsets of data for training/test samples
      # Return the indices
   train.ind <- sample(data.sz, train.sz)
   test.ind <- (1:data.sz) %w/o% train.ind
   list(train=train.ind, test=test.ind)
}


   #
Train.sz <- 52
   # Get the indices for the training and test samples
(tt.ind <- get.train(dim(df.flea)[1], Train.sz))
   #
tt.ind$train <- c(52,61,13,12,7,8,33,40,16,4,22,60,58,23,6,9,66,21,48,65,38,
                  5,3,41,27,42,73,71,32,43,74,50,51,1,34,47,69,45,56,28,10,
                  67,24,37,68,18,2,62,54,64,57,19)
tt.ind$test <- c(11,14,15,17,20,25,26,29,30,31,35,36,39,44,46,49,53,55,59,63,70,72)
   #
apply(class.ind(flea.species[tt.ind$train]),2,sum)
apply(class.ind(flea.species[tt.ind$test]),2,sum)

   # NN with no hidden layer
flea.nn <- nnet(df.flea[tt.ind$train,], class.ind(flea.species[tt.ind$train]),
                size=0, skip=T, rang=0.1, decay=5e-4, maxit=500)

summary(flea.nn)

flea.nn[11]

   #
t(flea.nn$fitted.values)

t(predict(flea.nn, df.flea[tt.ind$train,]))  

max.col(flea.nn$fitted.values)

confusion.expand(max.col(flea.nn$fitted.values), flea.species[tt.ind$train])

   #
predict(flea.nn, df.flea[tt.ind$test,])

confusion.expand(max.col(predict(flea.nn, df.flea[tt.ind$test,])),flea.species[tt.ind$test])


    # 1 Hidden
flea.nn <- nnet(df.flea[tt.ind$train,], class.ind(flea.species[tt.ind$train]),
                size=1, rang=0.1, decay=5e-4, maxit=500)
summary(flea.nn)
confusion.expand(max.col(flea.nn$fitted.values), flea.species[tt.ind$train])

   #
df.flea[1,]

   #
(i.case.1 <- c(1, as.matrix(df.flea[1,])))
(w.i.h <- flea.nn$wts[1:7])
   #
(to.h <- sum(i.case.1*w.i.h))
(from.h <- 1/(1+exp(-to.h)))
   #
(w.h.o1 <- flea.nn$wts[8:9])
(w.h.o2 <- flea.nn$wts[10:11])
(w.h.o3 <- flea.nn$wts[12:13])
   #
(to.o1 <- sum(c(1, from.h)*w.h.o1))
(to.o2 <- sum(c(1, from.h)*w.h.o2))
(to.o3 <- sum(c(1, from.h)*w.h.o3))
   #
from.o1 <- 1/(1+exp(-to.o1))
from.o2 <- 1/(1+exp(-to.o2))
from.o3 <- 1/(1+exp(-to.o3))
   #
cat(from.o1, from.o2, from.o3, "\n")

predict(flea.nn, df.flea[1,])

   #
confusion.expand(max.col(predict(flea.nn, df.flea[tt.ind$test,])),flea.species[tt.ind$test])

   # Try again
flea.nn <- nnet(df.flea[tt.ind$train,], class.ind(flea.species[tt.ind$train]), size=1, rang=0.1, decay=5e-4, maxit=500)
confusion(max.col(flea.nn$fitted.values), flea.species[tt.ind$train])

   # What is happening...
   # The following uses just 7 weights (bias & 2 variables) 
targets <- class.ind(c(rep("C", 5), rep("Hb", 4)))
plot(df.flea[1:9,1],df.flea[1:9, 2], pch = targets+1)

flea.nn <- nnet(df.flea[1:9, 1:2], targets, size = 1, rang = 0.1, decay = 5e-4, maxit=200)

   #
flea.nn <- nnet(df.flea[1:9, 1:2], targets, size = 1, rang = 0.1, decay = 5e-4, maxit=200)

   # 0
library(rgl)
z <- matrix(0, 81, 81)
x<-1:81
y<-1:81
for (i in x) {
   for (j in y) {
      mywts <- c((i-41)/200, (j-41)/200, 0, 0, 0, 0, 0) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1,
                      rang=0.1, decay=5e-4, maxit=200,trace=F)
      z[i,j] <- flea.nn$value
      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
   #
range(z)

   #   0.5
for (i in x) {
   for (j in y) {
      mywts <- c((i-41)/200, (j-41)/200, 0.5, 0.5, 0.5, 0.5, 0.5) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1, rang=0.1,
                      decay=5e-4, maxit=200, trace=F)
      z[i,j] <- flea.nn$value
#      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",
#          floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
range(z)

   #  1
for (i in x) {
   for (j in y) {
      mywts <- c((i-41)/200, (j-41)/200, 1, 1, 1, 1, 1) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1, rang=0.1,
                      decay=5e-4, maxit=200, trace=F)
      z[i,j] <- flea.nn$value
#      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",
#          floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
range(z)

   #
for (i in x) {
   for (j in y) {
      mywts <- c(0, (i-41)/200, (j-41)/200, 0, 0, 0, 0) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1, rang=0.1,
                      decay=5e-4, maxit=2000, trace=F)
      z[i,j] <- flea.nn$value
#      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",
#          floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
range(z)

   #
for (i in x) {
   for (j in y) {
      mywts <- c(1, (i-41)/200, (j-41)/200, 1, 1, 1, 1) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1, rang=0.1,
                      decay=5e-4, maxit=200, trace=F)
      z[i,j] <- flea.nn$value
#      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",
#          floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
range(z)

   #
for (i in x) {
   for (j in y) {
      mywts <- c(0, 0, 0, (i-41)/200, (j-41)/200, 1, 1) 
      flea.nn <- nnet(df.flea[1:9,1:2], targets, Wts=mywts, size=1, rang=0.1,
                      decay=5e-4, maxit=200, trace = F)
      z[i,j] <- flea.nn$value
#      cat("x = ",i," y = ",j," min = ", flea.nn$value," wts = ",
#          floor(1000*flea.nn$wts)/1000,"\n")
   }
}

persp3d(x,y,z, col = "lightblue")
contour((x-41)/200,(y-41)/200,z, nlevels=10,col=c(1,2,3,4,5,6,7,8,9,10))
range(z)




   ###### Synthetic

   #=================================================
   # Function to do the work of calling the plotting and nn routines
   #=================================================
load(paste(data.dir, "syn.Rdata", sep="/"))
load(paste(data.dir, "synTrainTest.Rdata", sep="/"))
   #
syn.train.class <- res$tp[tt.ind[[1]],]
syn.train.data <- res$data[tt.ind[[1]],]
   #
syn.test.class <- res$tp[tt.ind[[2]],]
syn.test.data <- res$data[tt.ind[[2]],]


model.exp <- expression(nnet(data.frame(train.data), class.ind(train.class), skip=T,
            softmax = T, size = extra, decay = 5e-4, maxit = 1000, trace = F))
predict.exp <- expression(as.integer(predict(model, points, type = "class")))


f.menu <- function(){
   while (TRUE) {
      cat("Enter the number of the data set","\n")
      cat ("0 to quit\n")
      resN <- menu(c("1:2", "1/3:2/4", "1:2/3:4", "1:2 rotated", "1/3:2/4 rotated",
                     "1:2/3:4 rotated", "Hole", "y^2"))
      if (resN == 0) break
      size <- readline("Number of nodes - 0 to 20: ")
      example.display(syn.train.data, syn.train.class[,resN], syn.test.data,
            syn.test.class[,resN],  c(100,100), paste("NN -", size), model.exp,
            predict.exp, extra = as.integer(size))
   }
}

oldpar <- par(mfrow = c(2,2), mar=c(2,2,2,2))
f.menu()

par(oldpar)

p.hat <- function(object, X) {
   X <- as.matrix(X)
   term <- cbind(rep(1,dim(X)[1]), X)%*%as.matrix(object$coefficients)
   1/(1+exp(-term))
}

model.exp <- expression(nnet(data.frame(train.data), class.ind(train.class), skip=T,
       softmax = T, size = extra, decay = 5e-4, maxit = 1000, trace = F))
predict.exp <- expression(as.integer(predict(model, points, type="class")))


load(paste(data.dir, "cloud2distinct.rdata", sep = "/"))
data.1 <- temp$c.1
sz.1   <- temp$s.1
data.2 <- temp$c.2
sz.2   <- temp$s.2
   # Combine and plot

G.1.data <- rbind(data.1, data.2)
dimnames(G.1.data) <- list(NULL, c("X", "Y"))

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))

size = 0
example.display(G.1.data, c(rep(0, sz.1), rep(1, sz.2)), {}, {},
            c(100,100), paste("NN -", size, "Distinct Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

size = 1
example.display(G.1.data, c(rep(0, sz.1), rep(1, sz.2)), {}, {},
            c(100,100), paste("NN -", size, "Distinct Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

size = 5
example.display(G.1.data, c(rep(0, sz.1), rep(1, sz.2)), {}, {},
            c(100,100), paste("NN -", size, "Distinct Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

size = 10
example.display(G.1.data, c(rep(0, sz.1), rep(1, sz.2)), {}, {},
            c(100,100), paste("NN -", size, "Distinct Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))
par(oldpar)

   #==================================
   # Cloud 3
load(paste(data.dir, "cloud2lap.rdata", sep = "/"))

data.3 <- temp$c.3
sz.3   <- temp$s.3
data.4 <- temp$c.4
sz.4   <- temp$s.4
   # Combine and plot
G.2.data <- rbind(data.3, data.4)
dimnames(G.2.data) <- list(NULL, c("X", "Y"))

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))

size = 0
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

size = 1
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))


size = 5
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

size = 10
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))
par(oldpar)

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))
size = 10
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))
size = 10
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))
size = 10
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))
size = 10
example.display(G.2.data, c(rep(0, sz.3), rep(1, sz.4)), {}, {},
            c(100,100), paste("NN -", size, "Overlapping Clouds"),
            model.exp, predict.exp, extra = size, c(-20, 40, -20, 40))

par(oldpar)


   # Cloud 5
load(paste(data.dir, "cloud5.rdata", sep = "/"))

sz.5   <- temp$s.5
data.5 <- temp$c.5

G.3.data <- rbind(data.3, data.4, data.5)
dimnames(G.3.data) <- list(NULL, c("X", "Y"))
   #

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))

size = 0
example.display(G.3.data, c(rep(0, sz.3), rep(1, sz.4), rep(2, sz.5)), {}, {},
            c(100,100), paste("NN -", size, "  3 clouds"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 1
example.display(G.3.data, c(rep(0, sz.3), rep(1, sz.4), rep(2, sz.5)), {}, {},
            c(100,100), paste("NN -", size, "  3 clouds"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))


size = 5
example.display(G.3.data, c(rep(0, sz.3), rep(1, sz.4), rep(2, sz.5)), {}, {},
            c(100,100), paste("NN -", size, "  3 clouds"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 10
example.display(G.3.data, c(rep(0, sz.3), rep(1, sz.4), rep(2, sz.5)), {}, {},
            c(100,100), paste("NN -", size, "  3 clouds"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))
par(oldpar)

   # 3 clouds
load(paste(data.dir, "cloud3.rdata", sep = "/"))

data.6 <- tmp$C.1
data.7 <- tmp$C.2
data.8 <- tmp$C.3
C.6 <- tmp$K1 - 1
C.7 <- tmp$K2 - 1
C.8 <- tmp$K3 - 1
data.123 <- data.frame(rbind(data.6, data.7, data.8))

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))

size = 0
example.display(data.123, c(C.6, C.7, C.8), {}, {},
            c(100,100), paste("NN -", size, "  3 cloud triangle"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 1
example.display(data.123, c(C.6, C.7, C.8), {}, {},
            c(100,100), paste("NN -", size, "  3 cloud triangle"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 5
example.display(data.123, c(C.6, C.7, C.8), {}, {},
            c(100,100), paste("NN -", size, "  3 cloud triangle"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 10
example.display(data.123, c(C.6, C.7, C.8), {}, {},
            c(100,100), paste("NN -", size, "  3 cloud triangle"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

par(oldpar)

            #############################

load(paste(data.dir, "ldaqdacovdiff.rdata", sep = "/"))

X.1 <- cov.diff$X1
n.1 <- length(X.1[ ,1])
colnames(X.1) <- c("x1", "x2")
X.2 <- cov.diff$X2
n.2 <- length(X.2[ ,1])
colnames(X.2) <- c("x1", "x2")
C.1 <- rep(0, n.1)
C.2 <- rep(1, n.2)
X.12 <- data.frame(rbind(X.1, X.2))

X.3 <- cov.diff$X3
n.3 <- length(X.3[,1])
colnames(X.3) <- c("x1", "x2")
X.4 <- cov.diff$X4
n.4 <- length(X.4[,1])
colnames(X.4) <- c("x1", "x2")
C.3 <- rep(0, n.3)
C.4 <- rep(1, n.4)
X.34 <- data.frame(rbind(X.3, X.4))

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))
size = 0
example.display(X.12, c(C.1, C.2), {}, {},
            c(100,100), paste("NN -", size, "Equal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 1
example.display(X.12, c(C.1, C.2), {}, {},
            c(100,100), paste("NN -", size, "Equal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 5
example.display(X.12, c(C.1, C.2), {}, {},
            c(100,100), paste("NN -", size, "Equal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 10
example.display(X.12, c(C.1, C.2), {}, {},
            c(100,100), paste("NN -", size, "Equal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))
par(oldpar)


oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))
size = 0
example.display(X.34, c(C.3, C.4), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 1
example.display(X.34, c(C.3, C.4), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 5
example.display(X.34, c(C.3, C.4), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 10
example.display(X.34, c(C.3, C.4), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))
par(oldpar)

X.5 <- Make.Gaussian(300, -12, 15, 2, 1, pi/4)
X.6 <- Make.Gaussian(500, 12, -15, 8, 1, -pi/4)
X.56 <- data.frame(rbind(X.5, X.6))
C.5 <- rep(0, 300)
C.6 <- rep(1, 500)

oldpar <- par (mfrow = c(2, 2), mar = c(2, 2, 2, 2))

size = 0
example.display(X.56, c(C.5, C.6), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 1
example.display(X.56, c(C.5, C.6), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 5
example.display(X.56, c(C.5, C.6), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))

size = 10
example.display(X.56, c(C.5, C.6), {}, {},
            c(100,100), paste("NN -", size, "Unequal Cov"),
            model.exp, predict.exp, extra = size, c(-40, 40, -40, 40))
par(oldpar)

x11()
example.display(X.56, c(C.5, C.6), {}, {},
                  c(100,100), "Logistic Regression",
                  expression(glm(factor(train.class)~train.data[,1]+train.data[,2],
                             quasibinomial(link = "logit"))),
                  expression((p.hat(model, points) > 0.5)*1), c(-40, 40, -40, 40))







   ######### Weight Decay ########

Wts <- c(-0.076,0.034,-0.046,0.076,-0.056,0.024,0.029,-0.002,-0.013,0.096,
         0.02,-0.1,-0.03,0.094,0.079,-0.077,0.092,0.021,-0.1,-0.029,0.067,
         -0.005,-0.066,-0.036,0.07,-0.045,-0.073,-0.05,-0.002,0.006,0.026,
         -0.027,-0.004,-0.01,-0.067,0.073,-0.028,0.016,-0.002,0.048,-0.004,
         -0.06,0,0.085,-0.084,0.012,-0.076,-0.048,-0.088,-0.029,0.017,0.076)


wt.decay.demo <- function(train.data, train.class, size, decay, Wts) {
  plot.class(train.data, train.class, {}, {}, paste("Set = ",7, " Size = ",size," Decay = ", decay))
     #
  model <-nnet(data.frame(train.data), class.ind(train.class),
               skip=F, softmax = T, size=size, decay=decay, maxit = 1000,
               trace = F, Wts=Wts)
  res <- f.create.grid(train.data, c(100,100))
  xp <- res$xp
  yp <- res$yp
  zp <- res$zp
  dimnames(zp)[[2]]<- dimnames(train.data)[[2]]
  Z <- as.integer(predict(model, zp, type="class"))
  points(zp[,1], zp[,2], pch=".", col= (matrix(Z, length(xp), length(yp))+2))
  for (c in 1:2) {
    contour(xp, yp, matrix(as.numeric(Z==(c-1)), length(xp), length(yp)), 
            add=T, levels=0.5, labex=0, col=1)
  }
  model$wts
}


wt.decay.demo(syn.train.data, syn.train.class[,7], 10, 0, Wts)

wt.decay.demo(syn.train.data, syn.train.class[,7], 10, 0.0005, Wts)

wt.decay.demo(syn.train.data, syn.train.class[,7], 10, 0.02, Wts)

   #### 3D ######
source(paste(code.dir, "display3D.r", sep = "/"))


     ################################
first <- 5
second <- 3
third <- 2
data.for.3d <- data.frame(d.flea[,first], d.flea[,second], d.flea[,third])
colnames(data.for.3d) <- colnames(d.flea[, c(first, second, third)])

Wts <- c(-0.99,10.71,-4.93,0.95,-15.76,-1.8,0.37,0.16,-15.34,13.5,11.89,10.6,-8.59,-13.63,-9.84,-6.11,12.75)
flea.nn.3d.2 <- nnet(data.for.3d, class.ind(flea.species),
                size = 2, skip = F, rang = 0.1, decay = 5e-4, maxit = 500, Wts=Wts)

summary(flea.nn.3d.2)

confusion.expand(max.col(flea.nn.3d.2$fitted.values), flea.species)

flea.nn.3d.2[11]

   # Get the change points
cols <- c(2, 4, 3)
res <- disp.3d(flea.nn.3d.2, data.for.3d, "net", 60)
data.3d.2 <- res$changePts
library(rgl)
plot3d(d.flea[,c(first, second, third)], type = "s", col = species + 1, size=0.5)
points3d(data.3d.2, size=2)
   # Use every 4th point
points3d(res$classes[(1:(nrow(res$classes)/4))*4, 1:3],
         col = cols[res$classes[(1:(nrow(res$classes)/4))*4 , 4]], size = 0.1)

   ############# 10
Wts <- c(16.54,2.17,-0.53,-0.17,-0.01,0.05,0.07,0.18,7.36,2.86,2.32,-1.3,17.5,
         -0.87,1.29,-0.54,-1.03,5.59,-0.82,-0.2,0,0.01,0.05,0.14,1.78,-3.16,
         -10.6,4.61,-5.24,5.15,6.69,-3.25,-6.26,0.58,8.4,-3.23,-2.99,8.87,
         -4.52,0.98,-4.44,-12.29,-4.47,-3.67,-15.14,1.44,-1.77,7.25,11.69,
         13.3,8.87,0.71,12.26,0.74,4.37,14.72,8.72,-0.75,-7.52,-11.99,-13.7,
         -8.6,0.76,-0.38,0.76,-0.65,1.88,-7.29,0.84,1.77,-0.55,0.73,-4.73)

flea.nn.3d.10 <- nnet(data.for.3d, class.ind(flea.species),
                size=10, skip=F, rang=0.1, decay=5e-4, maxit=500, Wts=Wts)

summary(flea.nn.3d.10)

confusion.expand(max.col(flea.nn.3d.10$fitted.values), flea.species)

flea.nn.3d.10[11]

   # Get the change points
res <- disp.3d(flea.nn.3d.10, data.for.3d, "net", 60)
data.3d.10 <- res$changePts

plot3d(d.flea[,c(first, second, third)], type="s", col = species+1, size=0.5)
points3d(data.3d.10, size = 2)
points3d(res$classes[(1:(nrow(res$classes)/4))*4, 1:3],
         col = cols[res$classes[(1:(nrow(res$classes)/4))*4 , 4]], size = 0.1)